home *** CD-ROM | disk | FTP | other *** search
/ Komputer for Alle 2004 #2 / K-CD-2-2004.ISO / OpenOffice Sv / f_0397 / python-core-2.2.2 / lib / shlex.py < prev    next >
Encoding:
Python Source  |  2003-07-18  |  7.6 KB  |  210 lines

  1. """A lexical analyzer class for simple shell-like syntaxes."""
  2.  
  3. # Module and documentation by Eric S. Raymond, 21 Dec 1998
  4. # Input stacking and error message cleanup added by ESR, March 2000
  5. # push_source() and pop_source() made explicit by ESR, January 2001.
  6.  
  7. import os.path
  8. import sys
  9.  
  10. __all__ = ["shlex"]
  11.  
  12. class shlex:
  13.     "A lexical analyzer class for simple shell-like syntaxes."
  14.     def __init__(self, instream=None, infile=None):
  15.         if instream:
  16.             self.instream = instream
  17.             self.infile = infile
  18.         else:
  19.             self.instream = sys.stdin
  20.             self.infile = None
  21.         self.commenters = '#'
  22.         self.wordchars = ('abcdfeghijklmnopqrstuvwxyz'
  23.                           'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_')
  24.         self.whitespace = ' \t\r\n'
  25.         self.quotes = '\'"'
  26.         self.state = ' '
  27.         self.pushback = []
  28.         self.lineno = 1
  29.         self.debug = 0
  30.         self.token = ''
  31.         self.filestack = []
  32.         self.source = None
  33.         if self.debug:
  34.             print 'shlex: reading from %s, line %d' \
  35.                   % (self.instream, self.lineno)
  36.  
  37.     def push_token(self, tok):
  38.         "Push a token onto the stack popped by the get_token method"
  39.         if self.debug >= 1:
  40.             print "shlex: pushing token " + `tok`
  41.         self.pushback = [tok] + self.pushback
  42.  
  43.     def push_source(self, newstream, newfile=None):
  44.         "Push an input source onto the lexer's input source stack."
  45.         self.filestack.insert(0, (self.infile, self.instream, self.lineno))
  46.         self.infile = newfile
  47.         self.instream = newstream
  48.         self.lineno = 1
  49.         if self.debug:
  50.             if newfile:
  51.                 print 'shlex: pushing to file %s' % (self.infile,)
  52.             else:
  53.                 print 'shlex: pushing to stream %s' % (self.instream,)
  54.  
  55.     def pop_source(self):
  56.         "Pop the input source stack."
  57.         self.instream.close()
  58.         (self.infile, self.instream, self.lineno) = self.filestack[0]
  59.         self.filestack = self.filestack[1:]
  60.         if self.debug:
  61.             print 'shlex: popping to %s, line %d' \
  62.                   % (self.instream, self.lineno)
  63.         self.state = ' '
  64.  
  65.     def get_token(self):
  66.         "Get a token from the input stream (or from stack if it's nonempty)"
  67.         if self.pushback:
  68.             tok = self.pushback[0]
  69.             self.pushback = self.pushback[1:]
  70.             if self.debug >= 1:
  71.                 print "shlex: popping token " + `tok`
  72.             return tok
  73.         # No pushback.  Get a token.
  74.         raw = self.read_token()
  75.         # Handle inclusions
  76.         while raw == self.source:
  77.             spec = self.sourcehook(self.read_token())
  78.             if spec:
  79.                 (newfile, newstream) = spec
  80.                 self.push_source(newstream, newfile)
  81.             raw = self.get_token()
  82.         # Maybe we got EOF instead?
  83.         while raw == "":
  84.             if len(self.filestack) == 0:
  85.                 return ""
  86.             else:
  87.                 self.pop_source()
  88.                 raw = self.get_token()
  89.          # Neither inclusion nor EOF
  90.         if self.debug >= 1:
  91.             if raw:
  92.                 print "shlex: token=" + `raw`
  93.             else:
  94.                 print "shlex: token=EOF"
  95.         return raw
  96.  
  97.     def read_token(self):
  98.         "Read a token from the input stream (no pushback or inclusions)"
  99.         while 1:
  100.             nextchar = self.instream.read(1)
  101.             if nextchar == '\n':
  102.                 self.lineno = self.lineno + 1
  103.             if self.debug >= 3:
  104.                 print "shlex: in state", repr(self.state), \
  105.                       "I see character:", repr(nextchar)
  106.             if self.state is None:
  107.                 self.token = ''        # past end of file
  108.                 break
  109.             elif self.state == ' ':
  110.                 if not nextchar:
  111.                     self.state = None  # end of file
  112.                     break
  113.                 elif nextchar in self.whitespace:
  114.                     if self.debug >= 2:
  115.                         print "shlex: I see whitespace in whitespace state"
  116.                     if self.token:
  117.                         break   # emit current token
  118.                     else:
  119.                         continue
  120.                 elif nextchar in self.commenters:
  121.                     self.instream.readline()
  122.                     self.lineno = self.lineno + 1
  123.                 elif nextchar in self.wordchars:
  124.                     self.token = nextchar
  125.                     self.state = 'a'
  126.                 elif nextchar in self.quotes:
  127.                     self.token = nextchar
  128.                     self.state = nextchar
  129.                 else:
  130.                     self.token = nextchar
  131.                     if self.token:
  132.                         break   # emit current token
  133.                     else:
  134.                         continue
  135.             elif self.state in self.quotes:
  136.                 self.token = self.token + nextchar
  137.                 if nextchar == self.state:
  138.                     self.state = ' '
  139.                     break
  140.                 elif not nextchar:      # end of file
  141.                     if self.debug >= 2:
  142.                         print "shlex: I see EOF in quotes state"
  143.                     # XXX what error should be raised here?
  144.                     raise ValueError, "No closing quotation"
  145.             elif self.state == 'a':
  146.                 if not nextchar:
  147.                     self.state = None   # end of file
  148.                     break
  149.                 elif nextchar in self.whitespace:
  150.                     if self.debug >= 2:
  151.                         print "shlex: I see whitespace in word state"
  152.                     self.state = ' '
  153.                     if self.token:
  154.                         break   # emit current token
  155.                     else:
  156.                         continue
  157.                 elif nextchar in self.commenters:
  158.                     self.instream.readline()
  159.                     self.lineno = self.lineno + 1
  160.                 elif nextchar in self.wordchars or nextchar in self.quotes:
  161.                     self.token = self.token + nextchar
  162.                 else:
  163.                     self.pushback = [nextchar] + self.pushback
  164.                     if self.debug >= 2:
  165.                         print "shlex: I see punctuation in word state"
  166.                     self.state = ' '
  167.                     if self.token:
  168.                         break   # emit current token
  169.                     else:
  170.                         continue
  171.         result = self.token
  172.         self.token = ''
  173.         if self.debug > 1:
  174.             if result:
  175.                 print "shlex: raw token=" + `result`
  176.             else:
  177.                 print "shlex: raw token=EOF"
  178.         return result
  179.  
  180.     def sourcehook(self, newfile):
  181.         "Hook called on a filename to be sourced."
  182.         if newfile[0] == '"':
  183.             newfile = newfile[1:-1]
  184.         # This implements cpp-like semantics for relative-path inclusion.
  185.         if type(self.infile) == type("") and not os.path.isabs(newfile):
  186.             newfile = os.path.join(os.path.dirname(self.infile), newfile)
  187.         return (newfile, open(newfile, "r"))
  188.  
  189.     def error_leader(self, infile=None, lineno=None):
  190.         "Emit a C-compiler-like, Emacs-friendly error-message leader."
  191.         if not infile:
  192.             infile = self.infile
  193.         if not lineno:
  194.             lineno = self.lineno
  195.         return "\"%s\", line %d: " % (infile, lineno)
  196.  
  197.  
  198. if __name__ == '__main__':
  199.     if len(sys.argv) == 1:
  200.         lexer = shlex()
  201.     else:
  202.         file = sys.argv[1]
  203.         lexer = shlex(open(file), file)
  204.     while 1:
  205.         tt = lexer.get_token()
  206.         if tt:
  207.             print "Token: " + repr(tt)
  208.         else:
  209.             break
  210.